Import and Filter TOC LEEF-2

Some preliminary setup

Show the code
library(LEEF.analysis)
library(LEEF.measurement.toc)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
Show the code
library(tidyr)
library(ggplot2)
library(ggExtra)
options(dplyr.summarise.inform = FALSE)
Show the code
options(RRDdb =  params$db)

unlink(file.path(params$root_folder, "1.pre-processed.data", "toc"))
pre_processor_toc(file.path(params$root_folder, "0.raw.data"), file.path(params$root_folder, "1.pre-processed.data"))
[1] "Log file set to /Volumes/LEEF/0.TOC/LEEF-2//1.pre-processed.data/toc/toc.log"

########################################################

Processing toc
Warning in file.copy(file.path(input, "..", "00.general.parameter", "."), :
problem copying
/Volumes/LEEF/0.TOC/LEEF-2//0.raw.data/../00.general.parameter/. to
/Volumes/LEEF/0.TOC/LEEF-2//1.pre-processed.data/toc/.: No such file or
directory
done

########################################################
Show the code
unlink(file.path(params$root_folder, "2.extracted.data", "toc"))
extractor_toc(file.path(params$root_folder, "1.pre-processed.data"), file.path(params$root_folder, "2.extracted.data"))
Extracting toc
[1] "Log file set to /Volumes/LEEF/0.TOC/LEEF-2//2.extracted.data/toc/toc.log"

########################################################

Extracting toc
Processing L2_20221111A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221114B.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221116A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221118A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221121A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221123A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221125A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221128A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221130A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221130B.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221202A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221205A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221207A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221209A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221212A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221214A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221216A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221219A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221221A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221223A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221226A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221228A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20221230A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230102A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230104A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230106A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230109A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230111A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230113A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230116A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230118A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230120A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230123A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230123B.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230125A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230127A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230130A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230130B.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230201A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230203A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230203B.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230206A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230208A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230210A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230213A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230213B.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230213C.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230215A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230217A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230220A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230222LSCB.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230224A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230224B.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230227A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230301A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230303A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230306A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230308A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230310A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230313A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230315A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
Processing L2_20230317A.txt ...
  |- Splitting file ...
  |- Processing metadata ...
  |- Processing parameter ...
  |- Processing calibration ...
  |- Processing actual data ...
  |- Saving files ...
done

########################################################

Read parameter and data files and determine CV and number extra measurements

Show the code
fns <- sapply(
  list.files(file.path(params$root_folder, "2.extracted.data", "toc"), "\\.data\\.", full.names = FALSE),
  function(fn){
    x <- strsplit(fn, "\\.")[[1]][[1]]
  }
)

data <- lapply(
    fns,
    function(fn){
      p <- read.csv(file.path(params$root_folder, "2.extracted.data", "toc", paste(fn, "parameter", "csv", sep = ".")))
      d <- read.csv(file.path(params$root_folder, "2.extracted.data", "toc", paste(fn, "data", "csv", sep = ".")))
      return(list(
        data = d, 
        parameter = p, 
        samples = max(p$samples), 
        extra_samples = max(p$extra_samples), 
        max_cv = max(p$max_cv)
      )
      )
    }
)

sapply(data, "[[", "samples") |> plot(ylab = "samples") |> print()

NULL
Show the code
sapply(data, "[[", "extra_samples") |> plot(ylab = "extra_samples") |> print()

NULL
Show the code
sapply(data, "[[", "max_cv") |> plot(ylab = "max_cv") |> print()

NULL
Show the code
samples <- sapply(data, "[[", "samples") |> min()

if (params$LEEF == "LEEF_1") {
  extra_samples <- sapply(data, "[[", "extra_samples") |> min()
} else if (params$LEEF == "LEEF_2") {
  extra_samples <- sapply(data, "[[", "extra_samples") |> max()
} else {
  stop("Unsopported `params$LEEF` value.", " Only `LEEF_1` and `LEEF_2` are curerently supported!")
}

max_cv <- sapply(data, "[[", "max_cv") |> max()

no_samples <- samples + extra_samples
max_col <- (2 + 3 + no_samples * 2) + 1

As the number of additional samples as well as the cv were different for some samples, we have to re-calculate conc and cv. The values which will be used are

  • samples: 2
  • extra_samples: 2
  • max_cv: 2

In case there are more extra samples, the last ones were be discarded.

Read data and discard more than 2 extra samples

Show the code
toc <- lapply(
  data,
  function(x){
    result <- x$data[,1:min(max_col, ncol(x$data))]
    if (ncol(x$data) < max_col) {
      sn <- paste0("conc_", 1:(extra_samples + samples))    
      snm <- sn[!(sn %in% names(result))]
      for (i in snm){
        result[,snm] <- NA
      }
    }
    return(result)
  }
) |> do.call(what = rbind)
toc$id <- 1:nrow(toc)

Remove missing inj_type

We have the following measurements files with measurements without inj_type or NULL. These can nod be analysed and need to be excluded.

Show the code
toc %>%
  filter(inj_type == "") %>%
  group_by(filename) %>%
  summarise(n = n()) %>% 
  collect() %>%
  knitr::kable()
filename n
L2_20230130A 4
L2_20230224A 136
L2_20230224B 8
L2_20230227A 8
L2_20230301A 8
L2_20230303A 8
L2_20230306A 8
L2_20230308A 8
L2_20230310A 8
L2_20230313A 16
L2_20230315A 16
L2_20230317A 16

These can be filtered out

Show the code
before <- nrow(toc)
toc <- toc %>%
  filter(inj_type != "")
after <- nrow(toc)
cat("Before : ", before, "\n")
Before :  8068 
Show the code
cat("Removed: ", before - after, "\n")
Removed:  244 
Show the code
cat("After  : ", after, "\n")
After  :  7824 

Remove rows from validation / null samples

In addition to the analysed samples, two more validation and null samples (H2O) were taken. Upon further consideration, these were considered as not useful and excluded from the analysis. They are saved as toc_val_samples.csv.

Show the code
toc %>%
  filter(identification == "H2O" | identification == "2.5mg/lIC") %>%
  group_by(filename) %>%
  summarise(n = n()) %>% 
  collect() %>%
  knitr::kable()
filename n
L2_20221111A 8
L2_20221114B 8
L2_20221116A 8
L2_20221118A 8
L2_20221121A 8
L2_20221123A 8
L2_20221125A 8
L2_20221128A 8
L2_20221130A 8
L2_20221130B 8
L2_20221202A 8
L2_20221205A 8
L2_20221207A 8
L2_20221209A 8
L2_20221212A 8
L2_20221214A 8
L2_20221216A 8
L2_20221219A 8
L2_20221221A 8
L2_20221223A 8
L2_20221226A 8
L2_20221228A 8
L2_20221230A 8
L2_20230102A 8
L2_20230104A 8
L2_20230106A 8
L2_20230109A 8
L2_20230111A 8
L2_20230113A 8
L2_20230116A 8
L2_20230118A 8
L2_20230120A 8
L2_20230123A 8
L2_20230123B 8
L2_20230125A 8
L2_20230127A 8
L2_20230130A 4
L2_20230130B 4
L2_20230201A 8
L2_20230203A 8
L2_20230203B 8
L2_20230206A 8
L2_20230208A 8
L2_20230210A 8
L2_20230213A 8
L2_20230213B 8
L2_20230213C 8
L2_20230215A 8
L2_20230217A 8
L2_20230220A 8
L2_20230222LSCB 8
L2_20230224A 4
L2_20230224B 12
L2_20230227A 16
L2_20230301A 16
L2_20230303A 16
L2_20230306A 16
L2_20230308A 16
L2_20230310A 16
L2_20230313A 24
L2_20230315A 24
L2_20230317A 24

These can be filtered out

Show the code
unlink(file.path(params$root_folder, "2.extracted.data", "toc_val_samples.csv"))
toc %>%
  filter(identification == "H2O" | identification == "2.5mg/lIC") %>%
write.csv(file = file.path(params$root_folder, "2.extracted.data", "toc_val_samples.csv"))
  
before <- nrow(toc)
toc <- toc %>%
  filter(identification != "H2O" & identification != "2.5mg/lIC")
after <- nrow(toc)
cat("Before : ", before, "\n")
Before :  7824 
Show the code
cat("Removed: ", before - after, "\n")
Removed:  584 
Show the code
cat("After  : ", after, "\n")
After  :  7240 

Check timestamp / id / Filename

In each file, there should be a maximum of 1 timestamp, except of files which contain two timestamps, i.e. which are named accodingly.

Show the code
toc %>%
  select(filename, timestamp, identification, inj_type) %>%
  filter(identification != "H2O" & identification != "2.5mg/lIC") %>%
  group_by(filename, timestamp) %>%
  summarise(timestamps_count = n()) %>%
  group_by(filename) %>%
  summarise(number_of_timestamps = n()) %>%
  arrange(desc(number_of_timestamps)) %>%
  knitr::kable()
filename number_of_timestamps
L2_20221111A 1
L2_20221114B 1
L2_20221116A 1
L2_20221118A 1
L2_20221121A 1
L2_20221123A 1
L2_20221125A 1
L2_20221128A 1
L2_20221130A 1
L2_20221130B 1
L2_20221202A 1
L2_20221205A 1
L2_20221207A 1
L2_20221209A 1
L2_20221212A 1
L2_20221214A 1
L2_20221216A 1
L2_20221219A 1
L2_20221221A 1
L2_20221223A 1
L2_20221226A 1
L2_20221228A 1
L2_20221230A 1
L2_20230102A 1
L2_20230104A 1
L2_20230106A 1
L2_20230109A 1
L2_20230111A 1
L2_20230113A 1
L2_20230116A 1
L2_20230118A 1
L2_20230120A 1
L2_20230123A 1
L2_20230123B 1
L2_20230125A 1
L2_20230127A 1
L2_20230130A 1
L2_20230130B 1
L2_20230201A 1
L2_20230203A 1
L2_20230203B 1
L2_20230206A 1
L2_20230208A 1
L2_20230210A 1
L2_20230213A 1
L2_20230213B 1
L2_20230213C 1
L2_20230215A 1
L2_20230217A 1
L2_20230220A 1
L2_20230222LSCB 1
L2_20230224A 1
L2_20230224B 1
L2_20230227A 1
L2_20230301A 1
L2_20230303A 1
L2_20230306A 1
L2_20230308A 1
L2_20230310A 1
L2_20230313A 1
L2_20230315A 1
L2_20230317A 1

Fixed the following typos in the raw data. Now, it looks OK for me.

  • LEEF_21_12_10 und 21_12_13: typos in Identification
  • LEEF_22_05_18: typo in Identificatio
  • LEEF_21_11_12: File name should be LEEF_21_11_12 und 21_15_10A
  • LEEF_22_01_19 und 22_01_21A: All analysis for 22_01_21 are not valid

Set unrealistic small TC values to NA

A density plot of the values of the TC measurements in all measurements.

Show the code
tc <- toc %>%
  filter(inj_type == "TC")
tc <- data.frame(conc_TC = c(tc$conc_1, tc$conc_2, tc$conc_3))
tc %>% ggplot(aes(x=conc_TC)) + 
  stat_density(bw = 0.1, na.rm = TRUE) +
  geom_vline(xintercept = 5, col = "red")

One can see for TC two peaks clearly separated. The values smaller than 5 will be set to NA as they are unrealistically low and can be linked to measuring errors in the machine.

Plot the previous plot zoomed in to conc <= 10 for each bottle

conc_1

Show the code
toc %>%
  filter(inj_type == "TC") %>%
  ggplot(aes(x=conc_1)) + 
  geom_vline(xintercept = params$min_TC, col = "red") +
  stat_density(bw = 0.1, na.rm = TRUE) +
  facet_wrap(~bottle, ncol = 1, scales = "free_y")

conc_2

Show the code
toc %>%
  filter(inj_type == "TC") %>%
  ggplot(aes(x=conc_2)) + 
  geom_vline(xintercept = params$min_TC, col = "red") +
  stat_density(bw = 0.1, na.rm = TRUE) +
  facet_wrap(~bottle, ncol = 1, scales = "free_y")

conc_3

Show the code
toc %>%
  filter(inj_type == "TC") %>%
  ggplot(aes(x=conc_3)) + 
  geom_vline(xintercept = params$min_TC, col = "red") +
  stat_density(bw = 0.1, na.rm = TRUE) +
  facet_wrap(~bottle, ncol = 1, scales = "free_y")
Warning: Groups with fewer than two data points have been dropped.
Groups with fewer than two data points have been dropped.
Warning: Removed 2 rows containing missing values (`position_stack()`).

Show the code
tc <- (toc$inj_type == "TC")
toc$conc_1[tc & sapply(toc$conc_1 < params$min_TC, isTRUE)] <- NA
toc$conc_2[tc & sapply(toc$conc_2 < params$min_TC, isTRUE)] <- NA
toc$conc_3[tc & sapply(toc$conc_3 < params$min_TC, isTRUE)] <- NA

Which are now gone

Show the code
tc <- toc %>%
  filter(inj_type == "TC")
tc <- data.frame(conc_TC = c(tc$conc_1, tc$conc_2, tc$conc_3))
tc %>% ggplot(aes(x=conc_TC)) + 
  stat_density(bw = 0.1, na.rm = TRUE) +
  geom_vline(xintercept = 5, col = "red")

Set unrealistic small IC values to NA

A density plot of the values of the IC measurements in all measurements.

Show the code
tc <- toc %>%
  filter(inj_type == "IC")
tc <- data.frame(conc_IC = c(tc$conc_1, tc$conc_2, tc$conc_3))
tc %>% 
  ggplot(aes(x=conc_IC)) + 
  geom_vline(xintercept = params$min_IC, col = "red") +
  stat_density(bw = 0.001, na.rm = TRUE)

One can see for TC two peaks clearly separated. The values smaller than 0.3 will be set to NA as they are unrealistically low and can be linked to measuring errors in the machine.

Plot the previous plot zoomed in to conc <= 3 for each bottle

conc_1

Show the code
toc %>%
  filter(inj_type == "IC") %>%
  ggplot(aes(x=conc_1)) + 
  geom_vline(xintercept = params$min_IC, col = "red") +
  stat_density(bw = 0.01, na.rm = TRUE) +
  facet_wrap(~bottle, ncol = 1, scales = "free_y")

conc_2

Show the code
toc %>%
  filter(inj_type == "IC") %>%
  ggplot(aes(x=conc_2)) + 
  geom_vline(xintercept = params$min_IC, col = "red") +
  stat_density(bw = 0.01, na.rm = TRUE) +
  facet_wrap(~bottle, ncol = 1, scales = "free_y")

conc_3

Show the code
toc %>%
  filter(inj_type == "IC") %>%
  ggplot(aes(x=conc_3)) + 
  geom_vline(xintercept = params$min_IC, col = "red") +
  stat_density(bw = 0.01, na.rm = TRUE) +
  facet_wrap(~bottle, ncol = 1, scales = "free_y")
Warning: Groups with fewer than two data points have been dropped.
Groups with fewer than two data points have been dropped.
Groups with fewer than two data points have been dropped.
Groups with fewer than two data points have been dropped.
Groups with fewer than two data points have been dropped.
Warning: Removed 5 rows containing missing values (`position_stack()`).

Show the code
tc <- (toc$inj_type == "IC")
toc$conc_1[tc & sapply(toc$conc_1 < params$min_IC, isTRUE)] <- NA
toc$conc_2[tc & sapply(toc$conc_2 < params$min_IC, isTRUE)] <- NA
toc$conc_3[tc & sapply(toc$conc_3 < params$min_IC, isTRUE)] <- NA

Which are now gone

Show the code
tc <- toc %>%
  filter(inj_type == "IC")
tc <- data.frame(conc_IC = c(tc$conc_1, tc$conc_2, tc$conc_3))
tc %>% ggplot(aes(x=conc_IC)) + 
  geom_vline(xintercept = params$min_IC, col = "red") +
  stat_density(bw = 0.001, na.rm = TRUE)

Recalculate conc and cv for all measurements except TOC

As the number of additional samples as well as the cv were different for some samples, we have to re-calculate conc and cv. The values which will be used are - samples: 2 - extra_samples: 2 - max_cv: 2 In case there are more extra samples, the last ones were be discarded.

As we are re-calculating TOC later, we can exclude the TOC values here.

Re-calculate conc and cv

Show the code
cvf <- function(x, y){
  sapply(
    1:length(x),
    function(i){
      sd(c(x[i], y[i])) / mean(c(x[i], y[i])) * 100
    }
  )
}


toc$cv_12 <- cvf(toc$conc_1, toc$conc_2)
toc$cv_13 <- cvf(toc$conc_1, toc$conc_3)
toc$cv_23 <- cvf(toc$conc_2, toc$conc_3)

toc$conc_12 <- rowMeans(toc[,c("conc_1", "conc_2")])
toc$conc_13 <- rowMeans(toc[,c("conc_1", "conc_3")])
toc$conc_23 <- rowMeans(toc[,c("conc_2", "conc_3")])

for (i in 1:nrow(toc)){
  if (toc[i,"inj_type"] == "TOC"){
    next()
  }
  if (isTRUE(as.numeric(toc[i,][["cv_12"]]) < max_cv)) {111
    toc[i,]$conc <- mean(as.numeric(toc[i,][["conc_1"]]), as.numeric(toc[i,][["conc_2"]]))
    toc[i,]$cv <- as.numeric(toc[i,][["cv_12"]])
  } else {
    sel <- suppressWarnings(
      which(
        toc[i, c("cv_12", "cv_13", "cv_23")] == min(
          as.numeric(toc[i,][["cv_12"]]), 
          as.numeric(toc[i,][["cv_13"]]), 
          as.numeric(toc[i,][["cv_23"]]), 
          na.rm = TRUE
        )
      ) 
    )
    if (length(sel) == 0) {
      toc[i,]$conc <- NA
      toc[i,]$cv <- NA
      next()
    }
    if (((as.numeric(toc[i,][c("conc_1", "conc_2", "conc_3")]) == 0) |> sum(na.rm = TRUE)) > 1) {
      toc[i,]$conc <- NA
      toc[i,]$cv <- NA
      next()
    }
    if (sel == 1) {
      toc[i,]$conc <- mean(as.numeric(toc[i,][["conc_1"]]), as.numeric(toc[i,][["conc_2"]]))
      toc[i,]$cv <- as.numeric(toc[i,][["cv_12"]])
      next()
    }
    if (sel == 2) {
      toc[i,]$conc <- mean(as.numeric(toc[i,][["conc_1"]]), as.numeric(toc[i,][["conc_3"]]))
      toc[i,]$cv <- as.numeric(toc[i,][["cv_13"]])
      next()
    }
    if (sel == 3) {
      toc[i,]$conc <- mean(as.numeric(toc[i,][["conc_2"]]), as.numeric(toc[i,][["conc_3"]]))
      toc[i,]$cv <- as.numeric(toc[i,][["cv_23"]])
      next()
    } else {
      stop("Here is something seriously wrong!!!!")
    }
  }
}

Calculate TOC

Now we re-calculate the TOC values.

Show the code
toc$conc[which(toc$inj_type == "TOC")] <- NA

TOC_calc <- toc %>% 
  mutate(an_id = paste(filename, position)) %>%
  pivot_wider(id_cols = an_id, names_from = inj_type, values_from = conc) %>%
  mutate(TOC_calc = TC - IC) %>%
  select(an_id, TOC_calc) %>%
  mutate(an_id = paste(an_id, "TOC"))

toc <- toc %>% 
  mutate(an_id = paste(filename, position, inj_type)) %>%
  left_join(TOC_calc, by = "an_id") %>%
  mutate( conc = ifelse(inj_type == "TOC", TOC_calc, conc) ) %>%
  select( -TOC_calc, -an_id)

Plot after re-calculation of the TOC values

Show the code
toc %>%   
  ggplot(aes(x=conc)) + 
  stat_density(bw = 0.1, na.rm = TRUE) +
  facet_grid(rows = vars(inj_type), scales = "free_y")

Plot the measured concentreation per bottle of all samples

Show the code
toc %>%
  ggplot(aes(x=conc)) + 
  stat_density(bw = 0.1, na.rm = TRUE) +
  facet_grid(rows = vars(bottle), cols = vars(inj_type), scales = "free")

TC

Show the code
toc %>%
  filter(inj_type == "TC") %>%
  ggplot(aes(x=conc)) + 
  geom_vline(xintercept = 5, col = "red") +
  stat_density(bw = 0.1, na.rm = TRUE) +
  facet_wrap(~bottle, ncol = 1, scales = "free_y")

IC

Show the code
toc %>%
  filter(inj_type == "IC") %>%
  ggplot(aes(x=conc)) + 
  geom_vline(xintercept = 0.3, col = "red") +
  stat_density(bw = 0.01, na.rm = TRUE) +
  facet_wrap(~bottle, ncol = 1, scales = "free_y")

TOC

Show the code
toc %>%
  filter(inj_type == "TOC") %>%
  ggplot(aes(x=conc)) + 
  geom_vline(xintercept = 4.7, col = "red") +
  stat_density(bw = 0.1, na.rm = TRUE) +
  facet_wrap(~bottle, ncol = 1, scales = "free_y")

TN

Show the code
toc %>%
  filter(inj_type == "TN") %>%
  ggplot(aes(x=conc)) +
  stat_density(bw = 0.1, na.rm = TRUE) +
  facet_wrap(~bottle, ncol = 1, scales = "free_y")

Add to database

The toc is now added to the database

Show the code
saveRDS(toc, file.path(params$root_folder, "toc.rds"))

add_to_and_overwrite_table_in_RRD(toc = toc, dbname =  params$db)
[1] 0
Show the code
toc_original <- toc

toc$id <- 1:nrow(toc)

Plot after filtering

Now let’s look at the plots of the measurements per bottle per timestep

Show the code
if (params$LEEF == "LEEF_1") {
  p1 <- plot_tocs_per_bottle_per_timestamp(db =  params$db, c("TC", "TOC"))
  p2 <- plot_tocs_per_bottle_per_timestamp(db =  params$db, c("IC"))
  p3 <- plot_tocs_per_bottle_per_timestamp(db =  params$db, c("TN"))
} else {
  p1 <- LEEF_2_plot_tocs_per_bottle_per_timestamp(db =  params$db, c("TC", "TOC"))
  p2 <- LEEF_2_plot_tocs_per_bottle_per_timestamp(db =  params$db, c("IC"))
  p3 <- LEEF_2_plot_tocs_per_bottle_per_timestamp(db =  params$db, c("TN"))
}
Warning: Missing values are always removed in SQL aggregation functions.
Use `na.rm = TRUE` to silence this warning
This warning is displayed once every 8 hours.
Show the code
p1

Show the code
p2

Show the code
p3

Number of measurements per timestamp, bottle and inj_type

We have to look if there are any duplicate measurements.

Show the code
dupl <- toc %>%
  group_by(timestamp, bottle, inj_type) %>%
  filter(!is.na(bottle)) %>%
  summarise(fn_1 = min(filename), fn_2 = max(filename), count = n(), min_conc = min(conc), max_conc = max(conc)) %>%
  filter(count > 1 & count < 10) %>%  
  mutate(diff = max_conc - min_conc) %>%
  arrange(inj_type, diff)
dupl %>% 
  group_by(fn_1, fn_2, timestamp) %>%
  summarise() %>%
  arrange(timestamp) %>%
  knitr::kable()
fn_1 fn_2 timestamp
L2_20221130A L2_20221130B 20221130
L2_20230123A L2_20230123B 20230123
L2_20230130A L2_20230130B 20230130
L2_20230203A L2_20230203B 20230203
L2_20230213A L2_20230213C 20230213

And some plots of the duplicate concentration values only

Show the code
dat <- db_read_toc(db =  params$db, duplicates = NULL) %>% 
  collect()
if (nrow(dat) > 0){
dat$id <- 1:nrow(dat)
ids <- dat %>% 
  filter(!is.na(bottle)) %>%
  group_by(timestamp, bottle, type) %>%
  summarize(min(id), max(id), n = n()) %>%
  filter(n > 1)
ids <- c(ids$`min(id)`, ids$`max(id)`) |>
  unique()
x <- dat %>% 
  filter(id %in% ids) %>%
  group_by(day, timestamp, bottle, type) %>%
  summarise(timestamp, bottle, type, mic = min(concentration), mac = max(concentration)) %>%
  mutate(mdiff = mac - mic)
}
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
dplyr 1.1.0.
ℹ Please use `reframe()` instead.
ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
  always returns an ungrouped data frame and adjust accordingly.

TN Duplicates

Show the code
if (nrow(x) > 0){
  pl <- x %>%
    filter(type == "TN") %>%
    ggplot2::ggplot(ggplot2::aes(x = mic, y = mac, colour = type)) +
    ggplot2::geom_point() +
    ggplot2::xlab("Smaler concentration Value") +
    ggplot2::ylab("Larger concentration Value")
  
  ggMarginal(pl, type="histogram")
}
Warning: Removed 28 rows containing missing values (`geom_point()`).

TC Duplicates

Show the code
if (nrow(x) > 0){
  pl <- x %>%
    filter(type == "TC") %>%
    ggplot2::ggplot(ggplot2::aes(x = mic, y = mac, colour = type)) +
    ggplot2::geom_point() +
    ggplot2::xlab("Smaler concentration Value") +
    ggplot2::ylab("Larger concentration Value")
  
  ggMarginal(pl, type="histogram")
}
Warning: Removed 28 rows containing missing values (`geom_point()`).

IC Duplicates

Show the code
if (nrow(x) > 0){
  pl <- x %>%
    filter(type == "IC") %>%
    ggplot2::ggplot(ggplot2::aes(x = mic, y = mac, colour = type)) +
    ggplot2::geom_point() +
    ggplot2::xlab("Smaler concentration Value") +
    ggplot2::ylab("Larger concentration Value")
  
  ggMarginal(pl, type="histogram")
}
Warning: Removed 30 rows containing missing values (`geom_point()`).

TOC Duplicates

Show the code
if (nrow(x) > 0){
  pl <- x %>%
    filter(type == "TOC") %>%
    ggplot2::ggplot(ggplot2::aes(x = mic, y = mac, colour = type)) +
    ggplot2::geom_point() +
    ggplot2::xlab("Smaler concentration Value") +
    ggplot2::ylab("Larger concentration Value")
  
  ggMarginal(pl, type="histogram")
}
Warning: Removed 30 rows containing missing values (`geom_point()`).

Final diagnostic report

Show the code
options(knitr.duplicate.label = "allow")
try(
  report_diagnostic(
    db = params$db,  
    template = params$LEEF,
    suffix = "TOC_added", 
    format = "html",
    lastDays = 7
  )
)


processing file: DiagnosticReport.Rmd

  |                                               
  |                                         |   0%
  |                                               
  |.                                        |   2%                             # A tibble: 1 × 1
  `MAX(timestamp)`
  <chr>           
1 20230407        
Quitting from lines 2-29 (DiagnosticReport.Rmd) 

                                                                                                                       
Error : disk I/O error